Code
siteinfo <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/EcoDrought_SiteInformation.csv")
siteinfo_sp <- st_as_sf(siteinfo, coords = c("long", "lat"), crs = 4326)Purpose: Quantify the suitability of existing modeling techniques for predicting streamflow in headwater systems.
Approach:
Site information
siteinfo <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/EcoDrought_SiteInformation.csv")
siteinfo_sp <- st_as_sf(siteinfo, coords = c("long", "lat"), crs = 4326)Little g’s
dat_clean <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/Qualitative/LittleG_data_clean.csv")Big G’s
dat_clean_big <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/Qualitative/BigG_data_clean.csv")Climate
climdf <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/Qualitative/Daymet_climate.csv")
climdf_summ <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/Qualitative/Daymet_climate_summary.csv")Water availability
wateravail <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/Qualitative/BigG_wateravailability_annual.csv")Write out point shape files for each state to feed into Stream Stats batch processor
siteinfo_sp_wy <- siteinfo_sp %>% filter(region == "Snake")
st_write(siteinfo_sp_wy, "C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/points/points_wy.shp")
siteinfo_sp_mt <- siteinfo_sp %>% filter(region %in% c("Flat", "Shields"))
st_write(siteinfo_sp_mt, "C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/points/points_mt.shp")
siteinfo_sp_ma <- siteinfo_sp %>% filter(region == "Mass")
st_write(siteinfo_sp_ma, "C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/points/points_ma.shp")
siteinfo_sp_va <- siteinfo_sp %>% filter(region %in% c("Shen"))
st_write(siteinfo_sp_va, "C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/points/points_va.shp")
siteinfo_sp_or <- siteinfo_sp %>% filter(region %in% c("Oreg"))
st_write(siteinfo_sp_or, "C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/points/points_or.shp")List geodatabase layer names
st_layers(dsn = "C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/StreamStats/points_mt7617/points_mt7617.gdb")Driver: OpenFileGDB
Available layers:
layer_name geometry_type features fields crs_name
1 GlobalWatershedPoint Point 39 8 WGS 84
2 GlobalWatershed Multi Polygon 39 28 WGS 84
3 CHARACTERISTICS NA 1921 11 <NA>
4 FLOWSTATS NA 6763 16 <NA>
Read watershed boundaries
sheds_montana <- st_read("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/StreamStats/points_mt7617/points_mt7617.gdb", layer = "GlobalWatershed")Reading layer `GlobalWatershed' from data source
`C:\Users\jbaldock\OneDrive - DOI\Documents\USGS\EcoDrought\EcoDrought Working\EcoDrought-Analysis\CompareModeledQ\StreamStats\points_mt7617\points_mt7617.gdb'
using driver `OpenFileGDB'
Simple feature collection with 39 features and 28 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -114.8904 ymin: 43.9457 xmax: -109.7226 ymax: 49.46148
Geodetic CRS: WGS 84
sheds_massach <- st_read("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/StreamStats/points_ma7625/points_ma7625.gdb", layer = "GlobalWatershed")Reading layer `GlobalWatershed' from data source
`C:\Users\jbaldock\OneDrive - DOI\Documents\USGS\EcoDrought\EcoDrought Working\EcoDrought-Analysis\CompareModeledQ\StreamStats\points_ma7625\points_ma7625.gdb'
using driver `OpenFileGDB'
Simple feature collection with 13 features and 25 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -72.82306 ymin: 42.4123 xmax: -72.62871 ymax: 42.54973
Geodetic CRS: WGS 84
sheds_oregon <- st_read("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/StreamStats/points_or7626/points_or7626.gdb", layer = "GlobalWatershed")Reading layer `GlobalWatershed' from data source
`C:\Users\jbaldock\OneDrive - DOI\Documents\USGS\EcoDrought\EcoDrought Working\EcoDrought-Analysis\CompareModeledQ\StreamStats\points_or7626\points_or7626.gdb'
using driver `OpenFileGDB'
Simple feature collection with 7 features and 41 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -118.9295 ymin: 42.48917 xmax: -118.561 ymax: 42.79204
Geodetic CRS: WGS 84
sheds_virginia <- st_read("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/StreamStats/points_va7627/points_va7627.gdb", layer = "GlobalWatershed")Reading layer `GlobalWatershed' from data source
`C:\Users\jbaldock\OneDrive - DOI\Documents\USGS\EcoDrought\EcoDrought Working\EcoDrought-Analysis\CompareModeledQ\StreamStats\points_va7627\points_va7627.gdb'
using driver `OpenFileGDB'
Simple feature collection with 32 features and 20 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -79.24034 ymin: 37.88165 xmax: -78.02949 ymax: 38.7622
Geodetic CRS: WGS 84
sheds_wyoming <- st_read("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/StreamStats/points_wy7628/points_wy7628.gdb", layer = "GlobalWatershed")Reading layer `GlobalWatershed' from data source
`C:\Users\jbaldock\OneDrive - DOI\Documents\USGS\EcoDrought\EcoDrought Working\EcoDrought-Analysis\CompareModeledQ\StreamStats\points_wy7628\points_wy7628.gdb'
using driver `OpenFileGDB'
Simple feature collection with 14 features and 17 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -110.5241 ymin: 43.64373 xmax: -110.1598 ymax: 43.87029
Geodetic CRS: WGS 84
sheds <- bind_rows(sheds_massach, sheds_montana, sheds_oregon, sheds_virginia, sheds_wyoming)
mapview(sheds) Find sites that were delineated incorrectly
options(scipen=999)
badsites <- tibble(sheds) %>% select(Name, Shape_Area, DRNAREA, ELEV) %>% rename(site_id = Name) %>% left_join(siteinfo %>% select(site_id, site_name, area_sqmi, elev_ft)) %>% select(site_id, site_name, DRNAREA, area_sqmi) %>% mutate(percerror = (DRNAREA - area_sqmi) / area_sqmi) %>% filter(percerror >= 0.15 | percerror <= -0.15)
badsites# A tibble: 15 × 5
site_id site_name DRNAREA area_sqmi percerror
<chr> <chr> <dbl> <dbl> <dbl>
1 WW West Whately Brook 0.0399 0.493 -0.919
2 WL West Brook Lower 0.086 8.51 -0.990
3 JB Jimmy Brook 7.87 0.974 7.08
4 SH08 Shields River ab Dugout 11.1 8.68 0.279
5 SH06 Lodgepole Creek 2.2 1.36 0.619
6 SH05 Dugout Creek 11.1 2.39 3.64
7 BIG_002 LangfordCreekLower 0.1 3.99 -0.975
8 RAP Rapidan River NWIS 0.0000386 115 -1.00
9 PI_09FL Piney River 09 0.36 4.28 -0.916
10 LEI Leidy Creek Mouth NWIS 0.000811 5.17 -1.00
11 PCM Pacific Creek at Moran NWIS 0.34 166. -0.998
12 SP10 SF Spread Creek Upper 0.000348 35.1 -1.00
13 SP09 SF Spread Creek Lower 72 44.3 0.627
14 SP08 Rock Creek 0.0000772 4.74 -1.00
15 SP03 Leidy Creek Lower 0.00112 5.17 -1.00
Stream stats site information
streamstats_info <- tibble(sheds) %>% select(Name, DRNAREA) %>% rename(site_id = Name) %>% left_join(siteinfo %>% select(site_id, site_name))
# streamstats_infoRead flow statistics from geodatabases
montana <- st_read("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/StreamStats/points_mt7617/points_mt7617.gdb", layer = "FLOWSTATS")Reading layer `FLOWSTATS' from data source
`C:\Users\jbaldock\OneDrive - DOI\Documents\USGS\EcoDrought\EcoDrought Working\EcoDrought-Analysis\CompareModeledQ\StreamStats\points_mt7617\points_mt7617.gdb'
using driver `OpenFileGDB'
massach <- st_read("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/StreamStats/points_ma7625/points_ma7625.gdb", layer = "FLOWSTATS")Reading layer `FLOWSTATS' from data source
`C:\Users\jbaldock\OneDrive - DOI\Documents\USGS\EcoDrought\EcoDrought Working\EcoDrought-Analysis\CompareModeledQ\StreamStats\points_ma7625\points_ma7625.gdb'
using driver `OpenFileGDB'
oregon <- st_read("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/StreamStats/points_or7626/points_or7626.gdb", layer = "FLOWSTATS")Reading layer `FLOWSTATS' from data source
`C:\Users\jbaldock\OneDrive - DOI\Documents\USGS\EcoDrought\EcoDrought Working\EcoDrought-Analysis\CompareModeledQ\StreamStats\points_or7626\points_or7626.gdb'
using driver `OpenFileGDB'
virginia <- st_read("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/StreamStats/points_va7627/points_va7627.gdb", layer = "FLOWSTATS")Reading layer `FLOWSTATS' from data source
`C:\Users\jbaldock\OneDrive - DOI\Documents\USGS\EcoDrought\EcoDrought Working\EcoDrought-Analysis\CompareModeledQ\StreamStats\points_va7627\points_va7627.gdb'
using driver `OpenFileGDB'
wyoming <- st_read("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/StreamStats/points_wy7628/points_wy7628.gdb", layer = "FLOWSTATS")Reading layer `FLOWSTATS' from data source
`C:\Users\jbaldock\OneDrive - DOI\Documents\USGS\EcoDrought\EcoDrought Working\EcoDrought-Analysis\CompareModeledQ\StreamStats\points_wy7628\points_wy7628.gdb'
using driver `OpenFileGDB'
streamstats <- bind_rows(montana, massach, oregon, virginia, wyoming) %>% filter(!Name %in% c(badsites$site_id)) %>% rename(site_id = Name) %>% left_join(siteinfo %>% select(site_id, site_name)) %>% left_join(streamstats_info)
head(streamstats) site_id RegionID RegionName AreaPercent AreaSqMi
1 NFF GC1906 Crippen_Bue_Region_13 61 947.3549
2 NFF GC1828 USA_Bieger_2015 60 937.1283
3 NFF GC1828 USA_Bieger_2015 60 937.1283
4 NFF GC1828 USA_Bieger_2015 60 937.1283
5 NFF GC1818 Northern_Rocky_Mountains_P_Bieger_2015 60 937.1283
6 NFF GC1818 Northern_Rocky_Mountains_P_Bieger_2015 60 937.1283
StatLabel StatName Value
1 PKMAX_CB_R Maximum Flood Crippen Bue Regional 2.46e+05
2 XABNKF_U_B Bieger_USA_channel_cross_sectional_area 9.05e+02
3 DBANKF_U_B Bieger_USA_channel_depth 5.77e+00
4 WBANKF_U_B Bieger_USA_channel_width 1.65e+02
5 XABNKF_P_B Bieger_P_channel_cross_sectional_area 8.52e+02
6 DBANKF_P_B Bieger_P_channel_depth 5.53e+00
Units Years PIl PIu SE SEp PC CitationID
1 cubic feet per second NA NA NA NA NA NA 186
2 square feet 0 NA NA NA NA NA 160
3 feet 0 NA NA NA NA NA 160
4 feet 0 NA NA NA NA NA 160
5 square feet 0 NA NA NA NA NA 160
6 feet 0 NA NA NA NA NA 160
site_name DRNAREA
1 North Fork Flathead River NWIS 1556.2
2 North Fork Flathead River NWIS 1556.2
3 North Fork Flathead River NWIS 1556.2
4 North Fork Flathead River NWIS 1556.2
5 North Fork Flathead River NWIS 1556.2
6 North Fork Flathead River NWIS 1556.2
View provided flow statistics for each state. What is relevant to this paper?
print("MONTANA")
unique(montana$StatName)
print("MASSACHUSETTS")
unique(massach$StatName)
print("OREGON")
unique(oregon$StatName)
print("VIRGINA")
unique(virginia$StatName)
print("WYOMING")
unique(wyoming$StatName)Availability of flow statistics varies greatly by state. What statistics do all states have in common (minus Wyoming)? Only 7 day 10 year low flow is relevant.
Reduce(intersect, list(unique(montana$StatName), unique(massach$StatName), unique(oregon$StatName), unique(virginia$StatName))) [1] "Maximum Flood Crippen Bue Regional"
[2] "Bieger_USA_channel_cross_sectional_area"
[3] "Bieger_USA_channel_depth"
[4] "Bieger_USA_channel_width"
[5] "Bieger_P_channel_cross_sectional_area"
[6] "Bieger_P_channel_depth"
[7] "Bieger_P_channel_width"
[8] "Bieger_D_channel_cross_sectional_area"
[9] "Bieger_D_channel_depth"
[10] "Bieger_D_channel_width"
[11] "7 Day 10 Year Low Flow"
For the West Brook, plot (annual) observed and StreamStats exceedance/duration curves and calculate absolute error
# set up
vars <- unique(massach$StatName)[grep("Duration", unique(massach$StatName))][-1]
sites <- c("Avery Brook", "Sanderson Brook", "Mitchell Brook", "Obear Brook Lower", "West Brook NWIS", "West Brook Upper")
preds <- list()
exceed <- list()
joined <- list()
joined_full <- list()
# calcualate
for (i in 1:length(sites)) {
obs <- dat_clean %>% filter(site_name == sites[i])
# stream stats duration
p <- streamstats %>%
filter(site_name == unique(obs$site_name), StatName %in% vars) %>%
mutate(exceedance = parse_number(StatName)) %>%
mutate(flow_cms = Value*0.02831683199881, area_sqkm = DRNAREA*2.58999)
p <- add_daily_yield(data = p %>% select(site_id, site_name, DRNAREA, area_sqkm, StatName, exceedance, flow_cms), values = flow_cms, basin_area = as.numeric(unique(p$area_sqkm)))
p <- p %>% mutate(logYield = log(Yield_mm))
preds[[i]] <- p
# calculate exceedance probability by site
exceeddat <- obs %>%
filter(!is.na(logYield)) %>%
arrange(desc(logYield)) %>%
mutate(exceedance = 100/length(logYield)*1:length(logYield))
exceed[[i]] <- exceeddat
# join observed and streamstats exceedance, calculate error
j <- exceeddat %>%
select(site_name, exceedance, logYield) %>%
mutate(exceedance = round(exceedance, digits = 0)) %>%
group_by(site_name, exceedance) %>%
summarize(logYield = mean(logYield)) %>%
ungroup() %>%
left_join(p %>%
select(site_name, exceedance, logYield) %>%
rename(logYield_ss = logYield)) %>%
mutate(error_abs = logYield_ss - logYield,
error_abs_real = exp(logYield_ss) - exp(logYield),
error_rel = (exp(logYield_ss) - exp(logYield)) / exp(logYield))
joined[[i]] <- j %>% filter(!is.na(error_abs))
joined_full[[i]] <- j
}
preds <- do.call(rbind, preds)
exceed <- do.call(rbind, exceed)
joined <- do.call(rbind, joined)
joined_full <- do.call(rbind, joined_full)
joined_mean <- joined %>% group_by(exceedance) %>% summarize(error_abs = mean(error_abs, na.rm = TRUE))
# preds <- preds %>% mutate(site_name = factor(site_name, levels = sites))
# exceed <- exceed %>% mutate(site_name = factor(site_name, levels = sites))
# joined <- joined %>% mutate(site_name = factor(site_name, levels = sites))
# calculate among size variation for StreamStats and observed exceedance
vardat_ss <- joined %>% group_by(exceedance) %>% summarize(exdsd = sd(logYield_ss))
vardat_obs <- joined_full %>% group_by(exceedance) %>% summarize(exdsd = sd(logYield))
# tibble for site labels
siteslabs <- tibble(site_name = sites)### Colored by site
# exceedance curves
p1 <- ggplot() +
geom_line(data = exceed, aes(x = exceedance, y = logYield, color = site_name), size = 1) +
geom_line(data = preds, aes(x = exceedance, y = logYield), color = "black") +
geom_point(data = preds, aes(x = exceedance, y = logYield), color = "black") +
geom_text(data = siteslabs, aes(x = Inf, y = Inf, label = site_name), vjust = 1.5, hjust = 1.05, size = 3) +
facet_wrap(~site_name, nrow = 3) +
xlab("Exceedance probability") + ylab("log(Yield, mm)") +
theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.grid = element_blank(), strip.text.x = element_blank(), strip.background = element_blank(),
legend.position = "none")
# absolute error
p2 <- ggplot(data = joined) +
geom_line(aes(x = exceedance, y = error_abs, group = site_name, color = site_name)) +
geom_point(aes(x = exceedance, y = error_abs, group = site_name, color = site_name)) +
geom_abline(intercept = 0, slope = 0, linetype = "dashed") +
geom_line(data = joined_mean, aes(x = exceedance, y = error_abs), size = 1) +
xlab("Exceedance probability") + ylab("Absolute error") + ylim(-2.1,0) +
theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
legend.position = "none")
# combine
annotate_figure(ggarrange(p1, p2, ncol = 2, labels = "auto"), top = text_grob("The West Brook, Massachusetts"))
### No Color
# exceedance curves
p1 <- ggplot() +
geom_line(data = exceed, aes(x = exceedance, y = logYield), size = 1) +
geom_line(data = preds, aes(x = exceedance, y = logYield), color = "red") +
geom_point(data = preds, aes(x = exceedance, y = logYield), color = "red") +
geom_text(data = siteslabs, aes(x = Inf, y = Inf, label = site_name), vjust = 1.5, hjust = 1.05, size = 3) +
facet_wrap(~site_name, nrow = 3) +
xlab("Exceedance probability") + ylab("log(Yield, mm)") +
theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.grid = element_blank(), strip.text.x = element_blank(), strip.background = element_blank(),
legend.position = "none")
# absolute error
p2 <- ggplot(data = joined) +
geom_line(aes(x = exceedance, y = error_abs, group = site_name)) +
geom_point(aes(x = exceedance, y = error_abs, group = site_name)) +
geom_abline(intercept = 0, slope = 0, linetype = "dashed") +
geom_line(data = joined_mean, aes(x = exceedance, y = error_abs), size = 1, col = "red") +
xlab("Exceedance probability") + ylab("Absolute error") + ylim(-2.1,0) +
theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
legend.position = "none")
# combine
annotate_figure(ggarrange(p1, p2, ncol = 2, labels = "auto"), top = text_grob("The West Brook, Massachusetts"))
Does StreamStats misrepresent among-site variation in flow duration?
p1 <- ggplot() +
geom_line(data = vardat_obs, aes(x = exceedance, y = exdsd), size = 1, color = "grey60") +
geom_line(data = vardat_ss, aes(x = exceedance, y = exdsd), color = "black") +
geom_point(data = vardat_ss, aes(x = exceedance, y = exdsd), color = "black") +
xlab("Exceedance probability") + ylab("Among-site standard deviation in log(Yield, mm)") +
theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
p2 <- vardat_obs %>%
left_join(vardat_ss %>% rename(exdsd_ss = exdsd)) %>%
mutate(diff = exdsd_ss - exdsd) %>%
filter(!is.na(diff)) %>%
ggplot() +
geom_line(aes(x = exceedance, y = diff)) +
geom_abline(intercept = 0, slope = 0, linetype = "dashed") +
xlab("Exceedance probability") + ylab("Difference") +
theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
annotate_figure(ggarrange(p1, p2, ncol = 2, labels = "auto"), top = text_grob("The West Brook, Massachusetts"))
For the Flathead (Big Creek), plot observed and StreamStats mean monthly flow/yield and calculate absolute error
# set up
vars <- unique(montana$StatName)[grep("Mean Flow", unique(montana$StatName))]
sites <- c("Hallowat Creek NWIS", "HallowattCreekLower", "WernerCreek", "LangfordCreekUpper", "Big Creek NWIS", "McGeeCreekLower")
preds_list <- list()
obs_list <- list()
hull_list <- list()
join_list <- list()
# calcualate
for (i in 1:length(sites)) {
# filter observed data
obs <- dat_clean %>%
filter(site_name == sites[i]) %>%
mutate(MonthName = factor(MonthName, levels = c("Oct", "Nov", "Dec", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep")))
# calculate monthly means
obs_mon <- obs %>%
group_by(site_name, WaterYear, MonthName) %>%
summarize(logYield = mean(logYield)) %>%
ungroup() %>%
mutate(MonthName = factor(MonthName, levels = c("Oct", "Nov", "Dec", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep")),
WaterYear = factor(WaterYear)) %>%
complete(site_name, WaterYear, MonthName)
# get monthly min/max for ribbon
hull <- obs_mon %>%
group_by(site_name, MonthName) %>%
summarize(min_logYield = min(logYield, na.rm = TRUE), max_logYield = max(logYield, na.rm = TRUE)) %>%
ungroup()
# get StreamStats mean monthly flow
preds <- streamstats %>%
filter(site_name == sites[i], StatName %in% vars, !is.na(AreaSqMi)) %>%
mutate(MonthName = substr(StatName, 1, nchar(StatName)-10),
Month = parse_number(StatLabel)) %>%
mutate(MonthName = factor(recode(MonthName, "January" = "Jan", "February" = "Feb", "March" = "Mar", "April" = "Apr", "June" = "Jun", "July" = "Jul", "August" = "Aug", "September" = "Sep", "October" = "Oct", "November" = "Nov", "December" = "Dec"), levels = c("Oct", "Nov", "Dec", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep"))) %>%
mutate(flow_cms = Value*0.02831683199881, area_sqkm = AreaSqMi*2.58999)
preds <- add_daily_yield(data = preds %>% select(site_id, site_name, area_sqkm, MonthName, Month, flow_cms), values = flow_cms, basin_area = as.numeric(unique(preds$area_sqkm)))
preds <- preds %>% mutate(logYield = log(Yield_mm))
# join and calculate error
join_list[[i]] <- obs_mon %>%
left_join(preds %>% select(site_name, MonthName, logYield) %>% rename(logYield_ss = logYield)) %>%
mutate(error_abs = logYield_ss - logYield,
error_abs_real = exp(logYield_ss) - exp(logYield),
error_rel = (exp(logYield_ss) - exp(logYield)) / exp(logYield))
# store in list
preds_list[[i]] <- preds
obs_list[[i]] <- obs_mon
hull_list[[i]] <- hull
}
preds <- do.call(rbind, preds_list)
obs_mon <- do.call(rbind, obs_list)
hull <- do.call(rbind, hull_list)
joined <- do.call(rbind, join_list)
joined_mean <- joined %>% group_by(MonthName) %>% summarise(error_abs = mean(error_abs, na.rm = TRUE))
# tibble for site labels
siteslabs <- tibble(site_name = sites, site_lab = c("Hallowatt Upper", "Hallowatt Lower", "Werner", "Langford Upper", "Big NWIS", "McGee Lower"))
# calculate among site variation for StreamStats and observed mean monthly flow (mean across years)
vardat <- preds %>%
group_by(MonthName) %>%
summarize(qsd_ss = sd(logYield)) %>%
ungroup() %>%
left_join(obs_mon %>%
group_by(site_name, MonthName) %>%
summarize(logYield = mean(logYield, na.rm = TRUE)) %>%
ungroup() %>%
group_by(MonthName) %>%
summarize(qsd_obs = sd(logYield)) %>%
ungroup()) %>%
mutate(diff = qsd_ss - qsd_obs, nummon = as.numeric(MonthName))### Colored by site
# observed and StreamStats monthly flow
p1 <- ggplot() +
geom_ribbon(data = hull, aes(ymin = min_logYield, ymax = max_logYield, x = as.numeric(MonthName), fill = site_name), alpha = 0.3) +
geom_line(data = obs_mon, aes(y = logYield, x = as.numeric(MonthName), group = WaterYear, color = site_name)) +
geom_point(data = obs_mon, aes(y = logYield, x = as.numeric(MonthName), group = WaterYear, shape = WaterYear, color = site_name)) +
geom_line(data = preds, aes(y = logYield, x = as.numeric(MonthName), group = site_name), color = "black") +
geom_point(data = preds, aes(y = logYield, x = as.numeric(MonthName)), color = "black") +
geom_text(data = siteslabs, aes(x = -Inf, y = Inf, label = site_lab), vjust = 1.5, hjust = -0.05, size = 3) +
facet_wrap(~site_name, ncol = 2) +
theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.position = "none",
strip.text.x = element_blank(), strip.background = element_blank()) +
ylab("Monthly mean log(Yield)") + xlab("Month") +
scale_x_continuous(breaks = 1:12, labels = c("O", "N", "D", "J", "F", "M", "A", "M", "J", "J", "A", "S"))
# absolute error by month
p2 <- ggplot() +
geom_line(data = joined, aes(x = as.numeric(MonthName), y = error_abs, shape = WaterYear, color = site_name)) +
geom_point(data = joined, aes(x = as.numeric(MonthName), y = error_abs, shape = WaterYear, color = site_name)) +
geom_line(data = joined_mean, aes(x = as.numeric(MonthName), y = error_abs), size = 1) +
geom_abline(intercept = 0, slope = 0, linetype = "dashed") +
xlab("Month") + ylab("Absolute error") +
scale_x_continuous(breaks = 1:12, labels = c("O", "N", "D", "J", "F", "M", "A", "M", "J", "J", "A", "S")) +
theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
legend.position = "none")
# combine
annotate_figure(ggarrange(p1, p2, ncol = 2, labels = "auto"), top = text_grob("North Fork Flathead River, Montana"))
### No color
# observed and StreamStats monthly flow
p1 <- ggplot() +
geom_ribbon(data = hull, aes(ymin = min_logYield, ymax = max_logYield, x = as.numeric(MonthName)), fill = "grey80") +
geom_line(data = obs_mon, aes(y = logYield, x = as.numeric(MonthName), group = WaterYear)) +
geom_point(data = obs_mon, aes(y = logYield, x = as.numeric(MonthName), group = WaterYear, shape = WaterYear)) +
geom_line(data = preds, aes(y = logYield, x = as.numeric(MonthName), group = site_name), color = "red") +
geom_point(data = preds, aes(y = logYield, x = as.numeric(MonthName)), color = "red") +
geom_text(data = siteslabs, aes(x = -Inf, y = Inf, label = site_lab), vjust = 1.5, hjust = -0.05, size = 3) +
facet_wrap(~site_name, ncol = 2) +
theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.position = "none",
strip.text.x = element_blank(), strip.background = element_blank()) +
ylab("Monthly mean log(Yield)") + xlab("Month") +
scale_x_continuous(breaks = 1:12, labels = c("O", "N", "D", "J", "F", "M", "A", "M", "J", "J", "A", "S"))
# absolute error by month
p2 <- ggplot() +
geom_line(data = joined, aes(x = as.numeric(MonthName), y = error_abs, group = interaction(site_name, WaterYear))) +
geom_point(data = joined, aes(x = as.numeric(MonthName), y = error_abs, shape = WaterYear)) +
geom_line(data = joined_mean, aes(x = as.numeric(MonthName), y = error_abs), size = 1, col = "red") +
geom_abline(intercept = 0, slope = 0, linetype = "dashed") +
xlab("Month") + ylab("Absolute error") +
scale_x_continuous(breaks = 1:12, labels = c("O", "N", "D", "J", "F", "M", "A", "M", "J", "J", "A", "S")) +
theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
legend.position = "none")
# combine
annotate_figure(ggarrange(p1, p2, ncol = 2, labels = "auto"), top = text_grob("North Fork Flathead River, Montana"))
Does StreamStats misrepresent among-site variation in mean monthly yield?
p1 <- vardat %>%
ggplot() +
geom_line(aes(x = nummon, y = qsd_obs), size = 1, color = "grey60") +
geom_line(aes(x = nummon, y = qsd_ss), color = "black") +
geom_point(aes(x = nummon, y = qsd_ss), color = "black") +
xlab("Month") + ylab("Among-site standard deviation in log(Yield, mm)") +
scale_x_continuous(breaks = 1:12, labels = c("O", "N", "D", "J", "F", "M", "A", "M", "J", "J", "A", "S")) +
theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
p2 <- vardat %>%
ggplot() +
geom_line(aes(x = nummon, y = diff)) +
xlab("Month") + ylab("Difference") +
geom_abline(intercept = 0, slope = 0, linetype = "dashed") +
scale_x_continuous(breaks = 1:12, labels = c("O", "N", "D", "J", "F", "M", "A", "M", "J", "J", "A", "S")) +
theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
annotate_figure(ggarrange(p1, p2, ncol = 2, labels = "auto"), top = text_grob("North Fork Flathead River, Montana"))
Pull streamflow predictions from Montana Climate Office’s (University of Montana) Streamflow API. The finest spatial resolution of streamflow predictions is HUC-10. This means that all sites within subbasins have the same predicted streamflow. This makes we wonder how relevant/useful these comparisons are, beyond g/G comparisons shown for objective 1.
Get HUC-10 watershed codes
myhucs <- c()
for (i in 1:dim(siteinfo_sp)[1]) { myhucs[i] <- get_huc(AOI = siteinfo_sp[i,], type = "huc10")$huc10 }
siteinfo <- siteinfo %>% mutate(huc10 = myhucs)
myhucs <- unique(myhucs)
#length(myhucs)Query UM Climatology
request = httr::GET(
# can replace this with /predictions/raw, the only query parameter that isn't shared is aggregations.
"https://data.climate.umt.edu/streamflow-api/predictions/",
query = list(
locations = paste(myhucs, collapse = ","),
date_start = "2015-01-01",
date_end = "2025-01-01",
aggregations = "mean",
as_csv = TRUE,
units = "mm"
)
)
umpreds <- httr::content(request)
umpreds <- umpreds %>% rename(huc10 = location)
print(umpreds)
write_csv(umpreds, "C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/UM_Climatology_PredictedQ.shp")Load UM Climatrology predicted flow data
umpreds <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/CompareModeledQ/UM_Climatology_PredictedQ.shp")
umpreds# A tibble: 58,464 × 5
huc10 version date metric value
<chr> <chr> <date> <chr> <dbl>
1 0108020106 vPUB2025 2015-01-01 mean 2.14
2 0108020106 vPUB2025 2015-01-02 mean 1.86
3 0108020106 vPUB2025 2015-01-03 mean 1.93
4 0108020106 vPUB2025 2015-01-04 mean 4.00
5 0108020106 vPUB2025 2015-01-05 mean 4.04
6 0108020106 vPUB2025 2015-01-06 mean 3.07
7 0108020106 vPUB2025 2015-01-07 mean 2.43
8 0108020106 vPUB2025 2015-01-08 mean 2.16
9 0108020106 vPUB2025 2015-01-09 mean 2.03
10 0108020106 vPUB2025 2015-01-10 mean 1.90
# ℹ 58,454 more rows
Join to observed data
dat_umpred <- dat_clean %>%
left_join(siteinfo %>% select(site_name, huc10)) %>%
left_join(umpreds %>% select(huc10, date, value)) Plot all time series data
umpreds %>%
ggplot(aes(x = date, y = value)) +
geom_line() +
facet_wrap(~huc10, scales = "free_y")
Compare predicted (red) and observed (black) streamflow data for select sites/basins. For some sites, modeled flow captures the general patterns/shapes of observed hydrographs well, but accuracy is reduced at fine temporal resolutions. For other sites, modeled flow fails to capture both the general and finer resolution asepcts of observed data.
dat_umpred %>% filter(site_name == "Jimmy Brook") %>% select(date, Yield_mm, value) %>% dygraph() %>% dySeries("Yield_mm", color = "black") %>% dySeries("value", color = "red") %>% dyRangeSelector() %>% dyAxis("y", label = "Yield (mm)") dat_umpred %>% filter(site_name == "Staunton River 06") %>% select(date, Yield_mm, value) %>% dygraph() %>% dySeries("Yield_mm", color = "black") %>% dySeries("value", color = "red") %>% dyRangeSelector() %>% dyAxis("y", label = "Yield (mm)") dat_umpred %>% filter(site_name == "Hallowat Creek NWIS") %>% select(date, Yield_mm, value) %>% dygraph() %>% dySeries("Yield_mm", color = "black") %>% dySeries("value", color = "red") %>% dyRangeSelector() %>% dyAxis("y", label = "Yield (mm)") dat_umpred %>% filter(site_name == "SF Spread Creek Lower") %>% select(date, Yield_mm, value) %>% dygraph() %>% dySeries("Yield_mm", color = "black") %>% dySeries("value", color = "red") %>% dyRangeSelector() %>% dyAxis("y", label = "Yield (mm)") dat_umpred %>% filter(site_name == "Dugout Creek") %>% select(date, Yield_mm, value) %>% dygraph() %>% dySeries("Yield_mm", color = "black") %>% dySeries("value", color = "red") %>% dyRangeSelector() %>% dyAxis("y", label = "Yield (mm)") dat_umpred %>% filter(site_name == "Donner Blitzen ab Indian NWIS") %>% select(date, Yield_mm, value) %>% dygraph() %>% dySeries("Yield_mm", color = "black") %>% dySeries("value", color = "red") %>% dyRangeSelector() %>% dyAxis("y", label = "Yield (mm)") NSE scores and categories per Moriasi et al. “Model evaluation guidelines for systematic quantification of accuracy in watershed simulations.” Transactions of the ASABE 50.3 (2007): 885-900.
| Category | NSE range |
|---|---|
| Very good | 0.75-1.00 |
| Good | 0.65-0.75 |
| Satisfactory | 0.50-0.65 |
| Unsatisfactory | <0.50 |
Note that there is no effort to ensure consistant data availability among sites/basins/years. Would it be better to restrict these?
NSE by subbasin for all available data.
dat_umpred %>%
group_by(site_name, subbasin) %>%
summarize(nse = NSE(sim = log(value), obs = log(Yield_mm))) %>%
ungroup() %>%
mutate(subbasin = factor(subbasin, levels = c("West Brook", "Paine Run", "Staunton River", "Big Creek", "Coal Creek", "McGee Creek", "Snake River", "Shields River", "Duck Creek", "Donner Blitzen"))) %>%
ggplot(aes(x = subbasin, y = nse)) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = 0.50, alpha = 0.3, fill = "red") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.50, ymax = 0.65, alpha = 0.3, fill = "orange") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.65, ymax = 0.75, alpha = 0.3, fill = "yellow") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.75, ymax = 1.00, alpha = 0.3, fill = "green") +
geom_boxplot(fill = "grey", outlier.shape = NA) +
geom_jitter(height = 0, width = 0.2, shape = 1) +
geom_abline(slope = 0, intercept = 1, color = "black", linetype = "dashed") +
xlab("Sub-basin") + ylab("Nash-Sutcliffe efficiency") + ggtitle("All data") +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 0.5, hjust = 0.4)) +
scale_y_continuous(limits = c(-8.5,1), expand = c(0,0))
NSE by subbasin, summer (July-August) only, all available years.
dat_umpred %>%
filter(Month %in% c(7:9)) %>%
group_by(site_name, subbasin) %>%
summarize(nse = NSE(sim = log(value), obs = log(Yield_mm))) %>%
ungroup() %>%
mutate(subbasin = factor(subbasin, levels = c("West Brook", "Paine Run", "Staunton River", "Big Creek", "Coal Creek", "McGee Creek", "Snake River", "Shields River", "Duck Creek", "Donner Blitzen"))) %>%
ggplot(aes(x = subbasin, y = nse)) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = 0.50, alpha = 0.3, fill = "red") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.50, ymax = 0.65, alpha = 0.3, fill = "orange") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.65, ymax = 0.75, alpha = 0.3, fill = "yellow") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.75, ymax = 1.00, alpha = 0.3, fill = "green") +
geom_boxplot(fill = "grey", outlier.shape = NA) +
geom_jitter(height = 0, width = 0.2, shape = 1) +
geom_abline(slope = 0, intercept = 1, color = "black", linetype = "dashed") +
xlab("Sub-basin") + ylab("Nash-Sutcliffe efficiency") + ggtitle("July-September") +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 0.5, hjust = 0.4)) +
scale_y_continuous(limits = c(-12.5,1), expand = c(0,0))
How does NSE vary through time?
NSE by subbasin and month, pooled across all available years
dat_umpred %>%
group_by(site_name, subbasin, MonthName) %>%
summarize(nse = NSE(sim = log(value), obs = log(Yield_mm))) %>%
ungroup() %>%
mutate(subbasin = factor(subbasin, levels = c("West Brook", "Paine Run", "Staunton River", "Big Creek", "Coal Creek", "McGee Creek", "Snake River", "Shields River", "Duck Creek", "Donner Blitzen")),
MonthName = factor(MonthName, levels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"))) %>%
ggplot(aes(x = MonthName, y = nse)) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = 0.50, alpha = 0.3, fill = "red") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.50, ymax = 0.65, alpha = 0.3, fill = "orange") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.65, ymax = 0.75, alpha = 0.3, fill = "yellow") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.75, ymax = 1.00, alpha = 0.3, fill = "green") +
geom_point(aes(group = site_name)) +
geom_abline(slope = 0, intercept = 1, color = "black", linetype = "dashed") +
scale_x_discrete(labels = c("Jan" = "J", "Feb" = "F", "Mar" = "M", "Apr" = "A", "May" = "M", "Jun" = "J", "Jul" = "J", "Aug" = "A", "Sep" = "S", "Oct" = "O", "Nov" = "N", "Dec" = "D")) +
xlab("Month") + ylab("Nash-Sutcliffe efficiency") + ggtitle("All data") +
facet_wrap(~subbasin, scales = "free_y") +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
scale_y_continuous(expand = c(0,0))
For the West Brook, show NSE by time and site, no pooling.
dat_umpred %>%
mutate(yearmonth = floor_date(date, "month")) %>%
filter(subbasin == "West Brook") %>%
group_by(site_name, subbasin, yearmonth) %>%
summarize(nobs = n(), nse = NSE(sim = log(value), obs = log(Yield_mm))) %>%
ungroup() %>%
filter(nobs >= 25) %>%
ggplot(aes(x = yearmonth, y = nse)) +
# annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = 0.50, alpha = 0.3, fill = "red") +
# annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.50, ymax = 0.65, alpha = 0.3, fill = "orange") +
# annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.65, ymax = 0.75, alpha = 0.3, fill = "yellow") +
# annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.75, ymax = 1.00, alpha = 0.3, fill = "green") +
geom_point(aes(color = site_name)) +
geom_line(aes(color = site_name)) +
geom_abline(slope = 0, intercept = 1, color = "black", linetype = "dashed") +
xlab("Time") + ylab("Nash-Sutcliffe efficiency") + ggtitle("West Brook (truncated y-axis limits)") +
#facet_wrap(~subbasin, scales = "free") +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
ylim(-50,1)
Often, ecological studyies use flow data aggregated at coarser temporal resolutions. How does efficiency change with the time scale of aggregation? Do flow predictions become more accurate when aggregated from daily to weekly or monthly means?
Aggregate data (or not), calculate NSE for each site, and combine.
daily <- dat_umpred %>%
group_by(site_name, subbasin) %>%
summarize(nse = NSE(sim = log(value), obs = log(Yield_mm))) %>%
ungroup() %>%
mutate(timescale = "day")
weekly <- dat_umpred %>%
mutate(date = floor_date(date, "week")) %>%
group_by(site_name, subbasin, date) %>%
summarise(nobs = n(), Yield_mm = mean(Yield_mm), value = mean(value)) %>%
ungroup() %>%
filter(nobs == 7) %>%
group_by(site_name, subbasin) %>%
summarize(nse = NSE(sim = log(value), obs = log(Yield_mm))) %>%
ungroup() %>%
mutate(timescale = "week")
monthly <- dat_umpred %>%
mutate(date = floor_date(date, "month")) %>%
group_by(site_name, subbasin, date) %>%
summarise(nobs = n(), Yield_mm = mean(Yield_mm), value = mean(value)) %>%
ungroup() %>%
filter(nobs >= 27) %>%
group_by(site_name, subbasin) %>%
summarize(nse = NSE(sim = log(value), obs = log(Yield_mm))) %>%
ungroup() %>%
mutate(timescale = "month")
timescale <- bind_rows(daily, weekly, monthly) %>%
mutate(timescale = factor(timescale, levels = c("day", "week", "month"))) Plot all sites
timescale %>%
ggplot(aes(x = timescale, y = nse, group = site_name)) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = 0.50, alpha = 0.3, fill = "red") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.50, ymax = 0.65, alpha = 0.3, fill = "orange") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.65, ymax = 0.75, alpha = 0.3, fill = "yellow") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.75, ymax = 1.00, alpha = 0.3, fill = "green") +
geom_point() +
geom_line() +
xlab("Time scale") + ylab("Nash-Sutcliffe efficiency") +
theme_bw() + ylim(-6,1) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
scale_y_continuous(limits = c(-14,1), expand = c(0,0))
Plot all sites, facet by subbasin
timescale %>%
mutate(subbasin = factor(subbasin, levels = c("West Brook", "Paine Run", "Staunton River", "Big Creek", "Coal Creek", "McGee Creek", "Snake River", "Shields River", "Duck Creek", "Donner Blitzen"))) %>%
ggplot(aes(x = timescale, y = nse, group = site_name)) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = 0.50, alpha = 0.3, fill = "red") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.50, ymax = 0.65, alpha = 0.3, fill = "orange") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.65, ymax = 0.75, alpha = 0.3, fill = "yellow") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0.75, ymax = 1.00, alpha = 0.3, fill = "green") +
geom_point() +
geom_line() +
xlab("Time scale") + ylab("Nash-Sutcliffe efficiency") +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
facet_wrap(~subbasin, scales = "free_y")
Generally, aggregating daily flow predictions to weekly or monthly means does not substantially change the accuracy of predicted data. However, there is a slight trend for accuracy (NSE) to increase with temporal aggregation for sites at which daily predictions are already fairly accurate. In contrast, for sites for which daily predictions are not accurate, temporal aggregation further decreases accuracy.